Load Dependencies

rm(list = ls())
library(jpeg)
## Warning: package 'jpeg' was built under R version 4.2.2
library(OpenImageR)
## Warning: package 'OpenImageR' was built under R version 4.2.2
library(Matrix)

Check the dimensions of our first image.

files <- "jpg/"
img <- readJPEG(paste0(files,"/RC_2500x1200_2014_us_53446.jpg"))

dim(img)
## [1] 1200 2500    3
#collect the dimensions 
row_dim <- dim(img)[1]
col_dim <- dim(img)[2]
channel_dim <- dim(img)[3]

Plot our image

# plot the image
imageShow(img)

Now that we’ve seen how to do it for one example we will now have all our examples

filenames <- list.files(path=files,pattern = ".jpg")

# Initialize the matrix where all files will be stored
all_images_data <- matrix(0, length(filenames), prod(dim(img))) 

# Show the dimensions of the matrix
dim(all_images_data)
## [1]      17 9000000

We know that there are 17 shoes thus all the shoes are here

Images into a matrix

counter <- 1

for (filename in filenames) {
  print(paste("shoefile: ",filename))
  img <- readJPEG(paste0("jpg/",filename))
  red  <- as.vector(img[,,1])
  green  <- as.vector(img[,,2])
  blue  <- as.vector(img[,,3])
  
  #Images are stored a single vector red-green-blue  
  all_images_data[counter,] <- t(c(red, green, blue))  

  counter <- counter+1
}
## [1] "shoefile:  RC_2500x1200_2014_us_53446.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53455.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53469.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53626.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53632.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53649.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53655.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53663.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_53697.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54018.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54067.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54106.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54130.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54148.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54157.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54165.jpg"
## [1] "shoefile:  RC_2500x1200_2014_us_54172.jpg"
shoe_i <- 2
all_images_data <- t(all_images_data)
#plot_shoe(all_images_data[,shoe2test])
imageShow(array(all_images_data[,shoe_i],c(row_dim,col_dim,channel_dim)))

SVD and Variance

# Scale data before applying SVD
scaled_data <- scale(all_images_data)
scaled_data[is.nan(scaled_data)] = 0
svd_decomp <- svd(scaled_data)


plot(svd_decomp$d^2/sum(svd_decomp$d^2), type="b",xlab = "Column", ylab = "Prop. of variance explained", pch = 19)

We now have the variance and will want to plot shoes by their variance. We notice that as we increase the columns we increase our variance. This is due to the fact that the columns all represent new components and as we add more components the overall images become defined. This is important because by examining these eigenvariables, we can identify the key features that are most significant in differentiating the images.

80%

var_pct <- 0.8
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]

      

newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])

imageShow(array(newimage,c(row_dim,col_dim,channel_dim)))

90%

shoe_i = 17
var_pct <- 0.9
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 7"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe_i],c(row_dim,col_dim,channel_dim)))

99%

shoe_i = 16
var_pct <- 0.99
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 16"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe_i],c(row_dim,col_dim,channel_dim)))

Conclusion

This assignment allowed us to utilize the images of various shoes and deconstruct them into RBG matrices. We then were able to perform PCA on these items to illustrate the vector components that caused the most variation. As we added more vector components we saw that the images became more defined, unique and generall started to differentiate from each other.